{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 05 Two-way frequency tables and Venn diagrams"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%html\n",
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import plotly.graph_objects as go\n",
"import seaborn as sns\n",
"from matplotlib_venn import venn2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import findspark\n",
"\n",
"findspark.init()\n",
"from pyspark.context import SparkContext\n",
"from pyspark.sql.session import SparkSession\n",
"\n",
"spark = SparkSession.builder.appName(\"statistics\").master(\"local\").getOrCreate()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[khanacademy](https://www.khanacademy.org/math/ap-statistics/analyzing-categorical-ap/stats-two-way-tables/v/two-way-frequency-tables-and-venn-diagrams?modal=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dataset = {\n",
" \"chocolate\": [\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"no\",\n",
" \"no\",\n",
" \"no\",\n",
" ],\n",
" \"coconut\": [\n",
" \"yes\",\n",
" \"yes\",\n",
" \"yes\",\n",
" \"no\",\n",
" \"no\",\n",
" \"no\",\n",
" \"no\",\n",
" \"no\",\n",
" \"no\",\n",
" \"yes\",\n",
" \"no\",\n",
" \"no\",\n",
" ],\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" chocolate | \n",
" coconut | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" yes | \n",
" yes | \n",
"
\n",
" \n",
" 1 | \n",
" yes | \n",
" yes | \n",
"
\n",
" \n",
" 2 | \n",
" yes | \n",
" yes | \n",
"
\n",
" \n",
" 3 | \n",
" yes | \n",
" no | \n",
"
\n",
" \n",
" 4 | \n",
" yes | \n",
" no | \n",
"
\n",
" \n",
" 5 | \n",
" yes | \n",
" no | \n",
"
\n",
" \n",
" 6 | \n",
" yes | \n",
" no | \n",
"
\n",
" \n",
" 7 | \n",
" yes | \n",
" no | \n",
"
\n",
" \n",
" 8 | \n",
" yes | \n",
" no | \n",
"
\n",
" \n",
" 9 | \n",
" no | \n",
" yes | \n",
"
\n",
" \n",
" 10 | \n",
" no | \n",
" no | \n",
"
\n",
" \n",
" 11 | \n",
" no | \n",
" no | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" chocolate coconut\n",
"0 yes yes\n",
"1 yes yes\n",
"2 yes yes\n",
"3 yes no\n",
"4 yes no\n",
"5 yes no\n",
"6 yes no\n",
"7 yes no\n",
"8 yes no\n",
"9 no yes\n",
"10 no no\n",
"11 no no"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(dataset)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+-------+\n",
"|chocolate|coconut|\n",
"+---------+-------+\n",
"| yes| yes|\n",
"| yes| yes|\n",
"| yes| yes|\n",
"| yes| no|\n",
"| yes| no|\n",
"| yes| no|\n",
"| yes| no|\n",
"| yes| no|\n",
"| yes| no|\n",
"| no| yes|\n",
"| no| no|\n",
"| no| no|\n",
"+---------+-------+\n",
"\n"
]
}
],
"source": [
"sdf = spark.createDataFrame(zip(*dataset.values()), schema=list(dataset.keys()))\n",
"sdf.registerTempTable(\"sdf_table\")\n",
"sdf.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"chocolate\n",
"no 3\n",
"yes 9\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby([\"chocolate\"]).size()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+-----+\n",
"|chocolate|count|\n",
"+---------+-----+\n",
"| no| 3|\n",
"| yes| 9|\n",
"+---------+-----+\n",
"\n"
]
}
],
"source": [
"sdf.groupby(\"chocolate\").count().show()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+-----+\n",
"|chocolate|count|\n",
"+---------+-----+\n",
"| no| 3|\n",
"| yes| 9|\n",
"+---------+-----+\n",
"\n"
]
}
],
"source": [
"spark.sql(\n",
" \"select chocolate, count(*) as count from sdf_table group by chocolate\"\n",
").show()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"coconut\n",
"no 8\n",
"yes 4\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby([\"coconut\"]).size()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+-----+\n",
"|coconut|count|\n",
"+-------+-----+\n",
"| no| 8|\n",
"| yes| 4|\n",
"+-------+-----+\n",
"\n"
]
}
],
"source": [
"sdf.groupby(\"coconut\").count().show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+-----+\n",
"|coconut|count|\n",
"+-------+-----+\n",
"| no| 8|\n",
"| yes| 4|\n",
"+-------+-----+\n",
"\n"
]
}
],
"source": [
"spark.sql(\"select coconut, count(*) as count from sdf_table group by coconut\").show()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"chocolate coconut\n",
"no no 2\n",
" yes 1\n",
"yes no 6\n",
" yes 3\n",
"dtype: int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby([\"chocolate\", \"coconut\"]).size()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+-------+-----+\n",
"|chocolate|coconut|count|\n",
"+---------+-------+-----+\n",
"| no| no| 2|\n",
"| no| yes| 1|\n",
"| yes| yes| 3|\n",
"| yes| no| 6|\n",
"+---------+-------+-----+\n",
"\n"
]
}
],
"source": [
"sdf.groupby(\"chocolate\", \"coconut\").count().show()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+-------+-----+\n",
"|chocolate|coconut|count|\n",
"+---------+-------+-----+\n",
"| no| no| 2|\n",
"| no| yes| 1|\n",
"| yes| yes| 3|\n",
"| yes| no| 6|\n",
"+---------+-------+-----+\n",
"\n"
]
}
],
"source": [
"spark.sql(\n",
" \"select chocolate, coconut, count(*) as count from sdf_table group by chocolate, coconut\"\n",
").show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" coconut | \n",
" no | \n",
" yes | \n",
" All | \n",
"
\n",
" \n",
" chocolate | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" no | \n",
" 2 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" yes | \n",
" 6 | \n",
" 3 | \n",
" 9 | \n",
"
\n",
" \n",
" All | \n",
" 8 | \n",
" 4 | \n",
" 12 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"coconut no yes All\n",
"chocolate \n",
"no 2 1 3\n",
"yes 6 3 9\n",
"All 8 4 12"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.crosstab(df[\"chocolate\"], df[\"coconut\"], margins=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------------+---+---+\n",
"|chocolate_coconut| no|yes|\n",
"+-----------------+---+---+\n",
"| yes| 6| 3|\n",
"| no| 2| 1|\n",
"+-----------------+---+---+\n",
"\n"
]
}
],
"source": [
"sdf.crosstab(\"chocolate\", \"coconut\").show()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAQUAAADrCAYAAABgr4PXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deZxcZZ3v8c9TXV29d6fTJ+ms3SFkIZElFQgMAVzGFRWXO4LrIF5Er4MoIy6lZCxLRWr0uo6zvOaKzqjjdgdFZ7woKDqMgCNgiUAwZO10kk4n1fta1VX1zB/nJLerzdJLnfOcU/V7v171Ir1UPb9uur/9nOc8i9JaI4QQJ4RMFyCE8BcJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRBEJBSFEEQkFIUQRCQUhRJGw6QJE8CVT6RD2z5ICsrGoJecGBJiScx/EdMlUuhnoACyg9RSPFqAaOwRCQBV2GJyggSwwCYwBI8AoMAgcA44CPUBvLGr1u/8VibmSUKhQyVS6FjgH6MQOgU7n0eJhGRmgFzgE7AaeBZ6NRa1JD2sQM0goVIhkKt0EXAA8B9gMrMWfY0oa6MYOiF3Ak7GoddhsSZVFQqFMJVNpBWwCLgW2YIeAOuOT/KsHeMx5PBmLWlOG6ylrEgplxAmCzcAVzmOx2YpcMQk8ATwKPByLWiOG6yk7EgplIJlKbwKuonyD4HRywOPAA8BvYlErZ7iesiChEFDOQOHzgVcAa4wW4w/DwC+An8aiVrfpYoJMQiFgkqn0Suwg+FOgwXA5fvUkcHcsaj1uupAgklAIiGQqfSFwLXARwR0w9Np+4G7gP2NRq2C6mKCQUPC5ZCq9GXgL9u1EMT+9wA+A+2NRK2u6GL+TUPCpZCq9EXgzEDVdSxkZAL6JHQ7yg38aEgo+k0ylO4AbgG2GSyln+4G7YlHrCdOF+JGEgk8kU+kG4E3Yg4hVhsupFI8CX41FrUOmC/ETCQUfSKbSLwBuxNt1B8KWB34MfEPWXNgkFAxKptLLgZux7ygIs3qBL8ai1pOmCzFNQsGQZCr9KuCtQMR0LeIkDdwLfK2Sew0SCh5LptItwF8CF5uuRZxWL/ClWNT6velCTJBQ8FAylb4YuBVYZLoWcVYa+BHwT5W2pkJCwQPJVLoa+zbjNchsxKDZBfx1LGodN12IVyQUXJZMpS1gB3Cu6VrEvI0An4lFrZTpQrwgoeCiZCq9HjsQKmk5c7kqAF+PRa27TRfiNgkFlyRT6SuxBxTl7kJ5eRD4Qjnv/iSh4IJkKv0G7NmJMn5Qnp4EPhmLWuOmC3GDhEIJJVPpMPbdheeZrkW4bh8Qj0WtQdOFlJqEQok4dxg+jCxkqiQ92MHQY7qQUpJQKIFkKh0Bbge2mq5FeG4A+Fgsau0zXUipSCgsUDKVrgH+Clm/UMnGgB2xqLXHdCGlIKGwAM7mqXHgfNO1CONGgFgsah00XchCSSjMk9ND+AT2gStCAPQDH4pFraOmC1kIPx4b5nvJVLoKiCGBIIotBj6ZTKXbTBeyENJTmIfk4+n3VmdDl9VOVI1XZ0JTobzSoQK6Kq+0/W+FykO+WjMVKYRykULVVLUO5yKF6qnqQk0uoutQEshlrBv4cCxqDZkuZD4kFM4ioRKLsY9iWw+s3nDjddWtF2y8QKHmvWWaRk9NRfRgpj43MdaU06MtuZrxptyifLWuLVnhwrTd2GMMgds9Omy6AL9JqMQy7ANZNzuP9hMfW/bcS7sXX3De6oW2oVDVkaxaEslGaBqM2H9XgHyVHh5vmhocWJINDVrZtny1rltoW8KY9cC7gc+ZLmSupKcAJFRiOXCl81h7qs9p2bi297x3vtlSofn3EOZCowvZ2sKxwSXZTHrZZGu2rtDsRbui5L4ai1o/MF3EXFRsKCRUogl4CfaU5HPO9LmR1paRLbffHA6Fw8b+ck/W5o/2dkzk+tszK2Q8IlAK2JObArPsuuJCIaESHcCrgBcwmxWMShW23P7uY7VW6zK3a5uNfEiPDLRn+no6xpfkarScJRkMo8D7gjIdumJCIaESW4H/wRxnHp5z3Su62rdf3OlOVfOn0fnxptyRg+vHGicb862m6xFndRC4LQgbwpb9QGNCJdZjb4V24Vyf27Jxbe/Sy7cueGDRDQpV1TBSvfq837YURhZNHTy4cWzxVE2h0XRd4rQ6gLcDXzZdyNmUbU/BuYtwPfbg4Zz3Naiqr53cGr91qqom0lTy4lyg0VP97ZnDh9aNrShUycYuPnZHLGr92nQRZ1J2oZBQiWrgjcBrWUBP6PzbbjrUuHr5qpIV5pGC0uNHOyfSvR0THaZrEac0Atwci1oDpgs5nbIaxU6oxAbgi8C1LCAQVr7kqq4gBgJASKv6FQfqOzY92nKkOhMaNV2P+CNN2KeC+VZZ9BSc3sGbsXsHCwq6SGvLSHTHLTWqKhT4LnhB6YlD68b6+pZnAhlwZe7zsaj1gOkiTiXwPYWESqwEvgD8GSX4eja87XVD5RAIACGt6jp2N65a/7vm7qqc8v2od4V5h3NamO8EOhQSKrEN+Cz2yO6CtZ6/saexY2XZ/VVtHK5e/Zz/ap1qGApXzIEmAdCAfZao7wTy8iGhEgp4PSXcMVlVhfIXf+K24XB9Xdne89foqe71Y71yOeEbGnvuwm7ThUwXuJ5CQiUi2HsZvJkSbqHe8aoXd5dzIIC9EGv17oaVq3Y3HDBdiwDsn993JFNpXx0FEKhQSKhELZAAtpfydSOtLSPLrty2opSv6VcKpZb01K5Z+1RTF5rgdRPLz3nYU+59IzChkFCJBuCTuLAf4oYbymdwcbZa+iOdG1Ith1WBvOlaBG9NptK+WSYfiFBIqEQz8ClgY6lfu2ltx/HGzvIbXJyNhtHwqvVPNB+RHoNxi7HvnvmC70PBWeJ8J6fZ52Ch1vzZy8r2TMDZaBipXr326abA70BcBq5JptK+WPXq61BwBhU/SoluOc7UsGp5X/2K9uVuvHaQtPRHOjt2NXSZrqPC1QPXmC4CfBwKzm3HD2IPxLhizbVXTyilfDXya0pbb23nin31EgxmvdoPYwu+DQXgRuAyt168rt0aaOxYudKt1w+ipYdqO6wjNd2m66hgjcArTBfhy1BIqMSLgVe72cY51758VHoJxRRKrdrT0F47VuXbFXwV4DXOQUPG+C4UnO3S/pebbdRYrUNN53ZWxLyEuVKoyLrfNxdUnooegDWoBXipyQJ8FQrOwOIHmM3eiQvQ+eqXDCrlza7MQVQ9FWpbu7PpiOk6KtjVJhv3VSgAbwPWuNmAClflFm1a1372z6xszQORThlfMGZVMpU2dmixb0IhoRKXAq90u50ll17UEwpXyUlMs7Bqb0N7zXho0HQdFcrYJYQvQiGhEvXALV60tey5l5X9ZrWlorSKnLOzadx0HRVqezKVNrI/qC9CAXsJ9CK3G4ksah6ta7d8cX5DUNSNh1e09kZkfMF7EQwtlDIeCs7dBtcvGwBW/On2PrkNOXer9zQ0yN0II15molHjoQC8E/DkToB1yQVyHuM8VOVDLav2NUhvwXurk6m0K2t+zsRoKCRU4krmcUjLfDSvX9Nb7puouKmtp2ZFZCI0bLqOCnSF1w0aCwVnbcObvGpv2VXbZOPSBVCo6s5djUOm66hAJd1QaDZM9hQuBzw7kq153Rq5dFighuHwSrlF6blVyVTa06MLTYbCdV41VNO2qKw3ZPWKQoVW7W2QSwjvXeplY0ZCIaESFwPnetWetfV8WeBTIk0D1SvDGTVmuo4Ks83Lxkz1FF7nZWOtF5wn6xxKRKGqVhyoT5uuo8JsSqbSnp0o7nkoOCc6eTevOxTK169otzxrrwK0HqtZogrkTNdRQULAJi8b89oLvWys+dyOtKx1KK2QVvVtR2t6TNdRYcozFJzbkJ5O3bQuvmDCy/YqRdvRWpkZ6q3yDAXgIsDTrnzzuZ3SS3BB3WhVeyhP1nQdFWRDMpX2ZDGf16Hg6aUDQKS12Zcn+wadQlW39tb0mq6jgkTw6I6dZ6GQUIkQcIlX7QFEWltGQ+Gw8d1xy5UllxBe8+QSwsuewnrs3Wo909S5UibauEguITxX8hPSTsXLUIh62BYATWs75AfWRQpVveh4zTHTdVQQT4439DIUtnrYFgANHStk0pLLmvurZb6Cd1Z4cWy9J6HgnBi9wYu2pqtb0ubp5Uolqh8J15uuoYJEgCVuN+JVT2E9Hm2kcoIKh6eq6mvlzoPLIplQK3KcvZdcP9XMq1A4x6N2Tqprt0aUUn7YWaqsKVR1w2hYFpx5x/VxhbINhZrWZhlk9EjTQLWsmvSO9BTmK9LaIhuNeqRxsFrmK3jH9YOMXA+FhEqE8ehWynQ1i1oKXrdZqWomQzKV3Duu7yDmRU9hOeD5ASyRRU3a6zYrVVUuJLNGvVMWoWBkG7RIS5PMUfBIKE89Gglhb7h+apQXoeD6yU+nEm5skFDwiEJVhbNKlqh7oyGZSrv6s122PYVwfZ2rx9mLYjWTVXIHwjuu9ha8uNafc0/hW3zroi662uuoy9zKrf8xn0araiI183meKROZ0dAtn710ez6fDeUL+dC2TS87cst1f/+s6bpmKzIZyo4FaKrYaP/x8LdjN13U172/CaV4zUc+88R5V744KPMtmgHXttr3IhTm/KMSJdp9OZcf+CE/3DLfRlVIBeryoSZSX/j8rQ890lTfms/mJtXNn9l6xaM77z22bfPVgThnoToTCtSsxn/92HvOX3fZ847f9I/3PD6VmVSZsZEg/by4OrDrxeXDnG9XbWJTfwMNC5t8FLCDZEMqRFN9ax5gKpcN5fO5QJ2FG9IqMAONY4P94cPP/L7t+W9770GA6ppa3bh4SZAWdrn6e+tFT8HQVOMA/UY5cvkpbrpz83P7Bg83PG/r6w9csullgeglAKgAzQrp3fNMfV1TS+afb33LlnTXnualazcOXvvxLz9d19QSlN5O4AcajYRCENc9hKuq+dqO3Q/+c3z//fsOP7Ho6f0PuX77qVSUDk4G53M5lT64t2X7G2488P4f/ubBSG1d/idf/Pg603XNgas/21784hjpVmqtA9Odnam1qT23ac3lfY/8/oeuL5MtlQBdPdC2es1kw6K2yQ3bXzgIcOFLX9tzdM/OAA2T4mq/zItQMNQl0wHq0MLxge7IwEhvGGB8ciT09L5fWR3LNo+armu2dID6ZYtXdmYaFy+ZOPR0qgHg2YcfsKyOc0dM1zUHrv5OeTGmMOcBw6/z9a099LRNMhn5NJ9+0eVcvusqruqe04sErKfQ23+g5nPfvjFa0HmltebSzS8/8pLLbgjMVmdaEZzrB+CaD37qqe/ueNfWfC4Xalm6fOxNf33XE6ZrmgNX/+B5EQpDc33C9Vz/24U2qgvB6imcf+5VI1/d8eyDpuuYr6lIIUB9BVh7yRXDt/3g1/9puo55yrj54l78jzQygl7ITrn6jRPFsrX5QE0WCzhXdykv21DITUzIJiseytQWZKWkdwIfCkamjk6NjAdpMkqgaXRhqqYgG7h6YyIWtVz92S7fUBgeCcpElMArhBhHmZqkVnFcv0vixf/IHlweLT2V7KAcDuWVfLggy6a9M+eB+7lyPRTiOp4FjrjdzkyZweEgLXAJtKmawqTpGipIWfQUAPZ71M5Jmf4hz7eAq1SjLblA3f4NONfnrpRtKGQHh2STFY8Mt2blzoN3DrvdgFehcMCjdk6aONbXFOT1D0Gh0fmx5pyR3bUqVNmEwrN4vDCqkMlG8pMZGW102VRE9+sqqk3XUUEOud2AJ6EQ1/EhYJ8XbU03mR4I0iKXQBpvysmdB+/kgF63G/Hy3nLKw7YAGOs+IhOYXDbSmg3UQqiA64lFLdcHdcs6FEb2d0u31kUanR9Ykg3Mng9lwPXxBPA2FHYCnt7PHtnf3ehle5Vmsj7fm6/WcmScd3Z70YhnoRDX8Rwe9xYy6YHmQj4vqyVd0t+ekcszb+30ohGv56v/wuP21NTQiOvTQiuRRuf7lmXk0sE7Oey7eK7zOhQexeVlnzONHDg07mV7lSJTl+/NV2uZtOSdvbGo5cl2AJ6GgnMJMa8Tn+ar77dPyTWvC/qWyaWDx57xqiETy11/7mVjg3/Ya+lCYcrLNsudVjqTXj65zHQdFaZ8QyGu43vxcCKTzuXDE73p4161VwmG2rJHC2FkbYl3NB4NMoKx05v4Vy8bG3hyl/QUSkSj9ZE143M+NFgsyJ5Y1PJsW0NTofArPJqIAZB+/Mlmr9oqd+NNucOZ+kKQDk4pB4962ZiRUIjruAbu9qq9id50qyyOKo1D68bkjoP3yj8UHL8APLvWH9nfbWSvyHIyUZ87PN6UbzNdR4U5Hotae7xs0FgoOLcnv+dVe70PPS4DYwug0fmu80all+C9h71u0PQOvD/FozsRA0/tWpbPZGUp9TwNWdlDE435xabrqEAPed2g0VBwxhb+AW82YFH9TzzT50E7ZacQ0mMHN4y1m66jAh0D/uB1o6Z7CsR1/Bk8WhNx+GcPya20eTiyZrwvH5bVkAbcF4tanm8paDwUHF8DXF+jMHksvSjTP+j6zjXlJFuTP3Z85eRq03VUoDxwv4mGfREKcR0fBL7iRVtHf/WYLKWeJY3O7980GiJgx8yXicdiUavfRMO+CAWAuI7fjz2pyVW9Dz22TNZCzM6xVZOHxptzluk6KtRPTDXsm1BwfBmXD7soZLKR4b0He9xsoxyMN+QOH1k73mm6jgqVBh431bivQiGu42PAZ3H57Mmue+5rlDMhTi9fpYf3XDgsk5TM+amJAcYTfBUKAHEd3wl8y802xg8fXTx+uNfz8y2DQKPz+54zPCl7LxozAfy7yQJ8FwoAcR3/LvCgm20cuPtemeF4CsdWTR4aXZRbarqOCvbjWNQaNVmAL0PB8UVcnLgxsr97yfjR4zK2MM2AlemScQSjMsA9povwbSg4R9h/HBeXWHd9/ye+/fq9NtIydfDAptEO03VUuHtjUcv4RsO+/qWI6/gI8FHAlenJQ8/ub59MDxx147WDZLwhd3jPhcOrZD6CUVng+6aLAJ+HAkBcx48BH8alW5Vd99xX0XchMrX5o89Gh9pR/v9ZKHP3xaKWL5b3B+IHIa7jPcCHgJKPAQw8tWv5xLG+ihxbyNbke/9w8eBiHSJsupYKNwZ8x3QRJwQiFADiOp4GYkB3qV9799f+b60u6HypX9fPxhtyh3deMri4UCUbsPrAt/0wlnBCYEIBIK7j/djBsKuUrzvec6y173dPlzxs/Gq4Ndu1a+vQcl2FHMBrXjeG5yXMFKhQAIjr+DB2MNxXytfd951/W5HPZI3eH3abRuveVRNdey8Y6ZQxBN/4x1jU8lUvVQV5tm9CJa4G3gGluSZeun1r99rrXlmWy4QLSk92bRztG1yaXWm6FnHSr2NR6w7TRcwU6L8WcR2/F7gdKMmo7bGHf7t68nh/2d2izNTmjz6zbXBKAsFXsni0XcBcBToU4ORaiXdTomnRz371ezXlMuio0bneVRMHdm4bbM/WFppM1yOKfDMWtXy54U+gLx9mSqjE5cBfAAvadu2c615xoH37xWtKUpQhU9WFvr3nDzMhW7L70TPAh0yuhDyTsgoFgIRKNAHvBJ437xdRqrBlx7uP17a1Bm6zUo2e6l+aOdy9YWyVzD/wpQzwnljU8u0q3bILhRMSKrEZeBtw3nyeH2ltGdly+83hUDgciLMONFqPtuQOdW0cXTQllwp+9rexqGVsV6XZKNtQOCGhEtuB64E5D7JZ2y48vO7Nr/H94NxkXb6n67yRiJze5Hv/FYtanzRdxNkEfqDxbOI6/jBwM/C3zHHFZfrR368ceHp3lyuFlUC2Jn9s3+aRnme2DS6XQPC9NPAl00XMRtn3FKZLqIQCLgauAaJw9lWBKhye2hp/73B1U4Mvfuk0empk0VRPz5qJetlUNTCyQCwWtXabLmQ2KioUpkuoxGrgFcCVwBmPVq9fvnTggg+8o1GFQsamBeeqCoN9yzNDvR0T7XIwS+B8Lha1PDnwqBQqNhROSKhECLgAuALYzmkCYsllWw6tfcM1K5VSnu05kK8qDA23Tg32LcvUjrROLZX9DgLpnljUust0EXNR8aEwnRMQzwG2AJuBDfD/VxF2vvalB5Y/77I1brWv0blMXeH4UFs2M7Ak0yRzDAIvBXwsFrVc3Z281CQUziChEmFgHXZArAdWbX739bp53ZpzFvraBaXHs7WFoYmGXHasOafGmnMNE425RTpE1UJfW/hCD/A+05uwzoeEwhz9w33Docn6/AfDWbWudjw8FcmE8qECOpRXhAqKUF4pVUCFCkrlw7qQqy7oqUhB5SI6NBUpVE1FCtWZ2nx9rkY3mP5ahGv6sQcWA7l5j4TCPCRT6VrgU9i9ByGmG8EOhIOmC5mvsp+n4IZY1JoEEsAh07UIXxkH4kEOBJBQmDdn+6wPA4H+ARAlkwU+EZS5CGciobAAsag1CHwEOGC4FGFWDvhULGo9ZbqQUpAxhRJIptJNwCeAc03XIjw3iR0IKdOFlIqEQokkU+kG7BOtNpiuRXhmGHseQuAvGaaTUCihZCpdD+zAniEpytsx4KOxqOXasYamSCiUWDKVDmPv/vRi07UI13Rh32Vw5ThD0yQUXJJMpV+LvcmLrFcoL08BdwRxpuJsSSiUgFJqDbAfqNZa5068P5lKXwp8AJBVjeXhx8BXYlErd9bPDDAJhRI4XSgAJFPpNdgnZy/xvDBRKlPA38ei1v2mC/GC7+cpKKVWK6W+r5Q6rpTqU0p9WSkVUkrtUEp1KaWOKaW+rpRqmfacK5VSDyulBpVS3UqpG5z3tzife9x57g6lVMj52A1KqV8ppf63UmpAKbVfKXX1tNc8oJR60bS3P6aU+qbz5ont5QeVUqNKqctPfF4sah0A3gs87Nb3SLjqKPD+SgkE8HkoKKWqsM/Z6wLWYO+z+B3gBufxAmAt0Ah82XlOB3Av8DfYf523AL9zXvJvsPdLWIu92/P12Nf9J1yGfU6lBXwauGuW+yc81/nvIq11o9b6kekfjEWtkVjUuhN7S7jMbL524QuPALfGotY+04V4ydeXD85f3B8By6d3y5VSPwfu1lr/nfP2RuwBoDrsa/hLtdavnfFaVdhz06Na653O+94JvFFr/XynN7FDa73O+Vg99hHhy7XWR5VSB4C3a61/5nz8Y8A6rfVbznT5MFMylV7l1Lh2vt8X4box7DMeHzBdiAm+7ikAq4GuU/yircDuPZzQhX2eZLvznL2neC0Le8OUmc+bvlvzySPjtNbjzj8b51X5acSi1iHgNuAewL+JXLkeBf6iUgMBSnQwq4u6gQ6lVHhGMBwBOqe93YE9/7zXec6lp3itNPaAUSewc9rzZjv5ZAyon/b2smn/ntMvtzN6fVcylX4E+8i7sjzUNmBGgf9TyWFwgt97Cr/B3sEmqZRqUErVKqWuAL4N/KVS6hylVCP23gbfdYLjX4AXKaWuU0qFlVJtSqktWus88D3gDqVUk1KqE3gf8M1TN/1Hfge8QSlVrZS6BHjdtI8dBwrM8ZIgFrV2Au8BvoG9yk6Y8WvgZgkEm69DwflFvgZ7S7SD2PsXvB74KvYv0oPY1/KTwC3Ocw4CL8fuovdj/zJf5LzkLdh/8fcBvwK+5bzWbPwV9oKnAey9FL41rc5x4A7gIeeOx5/M9muMRa1cLGp9D3gX9g+n8M4+4PZY1LojFrX6TRfjF74eaKxEyVQ6CvxP7Lstwh392D3En/n1kFeTJBR8KJlKK+wt59+EjDeUUgb4AXC3s3uWOAUJBR9zwuG5wBuZx1mY4qRx7LkrP5LLhLOTUAiAZCodwp6odS0SDnMxgD3P5d5Y1BozXUxQSCgEiNNzuAh4JbANnw8UG9QDfB/4eSxqTZkuJmgkFAIqmUovAa4GXsJZzsKsEDnsW9j3Ab+VAcT5k1AIuGQqXY19BuZVwFbA2CG4huwDHgB+6eywLRZIQqGMONvBXYp9knY5B8Q+7OnIDwb9jAU/klAoU8lUug47IC4DLiTYlxiT2JPQHgMeK9dt0PxCQqECOAOUndiDlM/BPjDXzyExBuzBXsb+JPC0DBh6R0KhQiVT6ZXY07Y7nUcH9iIvr/eUnMCevr4beBY7CA7LQKE5EgripGQqXYM9g7IDe6n5IqB1xqNuDi85hb36cBQYwl7F2oO9RP0ocFQGB/1HQkHMSTKVjmAPYFbNeISxexlZ7L/+mVjUkpWfASShIIQoIjPihBBFJBSEEEUkFIQQRSQUhBBFJBSEEEUkFIQQRSQUhKtOHMfncZu/VEq93cs2y4mEgqhoM88IFRIKQogZJBREyZzqhPBpHzvdad4rlFI/Ukr1K6X2KKVumvaxKqXUR5RSe5VSI0qpx5VSq52PbVdKPaqUGnL+u/00NZ2rlHrAqSetlPoXpdQi52PfwF7n8W/OaeEfdN7/J9NOLX9CKfV8V75hfqW1loc8FvzAXv/wBPB5oAGoxd7s5QbshVE3OZ/zLuxj/05Msf8P4O+cz9+CfdrWC52PfQB76fRG7HUVFwFtwGLsTVn/HHvNxRudt9uc5/0S+zBgsA8SejFQg30K+YPAF6bVfQB40bS3VwJ92AcKhZzn9gFLTH+PPft/aboAeZTHA7jc+YUOz3j/DcCeaW/XY5+9uQx7RWYeaJr28TuBf3L+vQt49Sna+nPgNzPe9whwg/Pvk6Fwiue+BkhNe3tmKHwI+MaM5/wUeKvp77FXD78fMCuC43QnhMOM07yVUmCf5t0G9GutR6Z9bhdwybTXPNUJ4jNPHT/xvD/a/l4ptRT4EvYelk3Yf/0HzvB1dALXKqWumfa+auAXZ3hOWZExBVEqJ08In8NzjgCLlVJN0943/STwbuyNYE71vM4Z7zvdCeJ3YvdMLtRaNwNvoXgjmZnLhLuxewqLpj0atNbJWX1FZUBCQZTK6U4IPy2tdTfwMHCn8/kXAjdinxwO8BXgE0qp9cp2ocKrzhkAAADRSURBVFKqDfh/wAal1Juck8Vfj73F3L+fopkm7E1eBpVSK7HHKabrpfi08G8C1yilXuoMdNYqpZ6vlFo1l29GkEkoiJLQpz8h/GzeiH2Y7hHscx7jWuv7nY99Dvge9lkOw8BdQJ3Wug/7QJzbsAcBPwi8UmudPsXrJ7B3th4Cfox9SMx0dwI7nDsN73eC6tXAR7DHSLqxg6RifldkkxUhRJGKST8hxOxIKAghikgoCCGKSCgIIYpIKAghikgoCCGKSCgIIYpIKAghikgoCCGK/DdP6D/JFLaOJgAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"venn2(\n",
" subsets=[i for i in df.groupby([\"chocolate\", \"coconut\"]).size()[1:]],\n",
" set_labels=(\"coconut\", \"chocolate\", \"both\"),\n",
" set_colors=(\"purple\", \"skyblue\"),\n",
" alpha=0.7,\n",
");"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}